# R Options
options(stringsAsFactors=FALSE)
# Required libraries
library(Seurat) # main
library(ggplot2) # plots
library(patchwork) # combination of plots
library(magrittr) # %>% operator
# Other libraries we use
# Knit: knitr
# Data handling: dplyr, tidyr, purrr
# Tables: kableExtra
# Plots: ggsci
# Source plotting functions
source("R/functions_io.R")
source("R/functions_plotting.R")
source("R/functions_analysis.R")
source("R/functions_util.R")
# Knitr default options
knitr::opts_chunk$set(echo=TRUE, cache=FALSE, message=FALSE, warning=FALSE, fig.width=10)Open this code chunk to read all parameters that are set specifically for your project.
param = list()
# Project ID
param$project = "HTO_testDataset"
# Input data path in case Cell Ranger was run
param$path_data = "test_datasets/10x_pbmc_hto_GSE108313/counts"
# Output path
param$path_out = "test_datasets/10x_pbmc_hto_GSE108313/demultiplexed"
if (!file.exists(param$path_out)) dir.create(param$path_out, recursive=TRUE)
# HTO names
# HTOs have an ID that is included in the 'features.tsv' input file
# We additionally ask for readable names that are used throughout this report
# This could look like this, where HTO1-3 are the IDs included raw dataset
# param$hto.names = setNames(c("NameA", "NameB", "NameC"), c("HTO1", "HTO2", "HTO3"))
param$hto_names = setNames(c("htoA","htoB","htoC","htoD","htoE","htoF","htoG","htoH"),
c("htoA","htoB","htoC","htoD","htoE","htoF","htoG","htoH")) # Name equals ID in this test case
# Prefix of mitochondrial genes
param$mt = "^MT-"
# Main color to use for plots
param$col = "palevioletred"
# Sample data to at most n cells (mainly for tests); set to NULL to deactivate
param$sample_cells = 3000param$col_hto_global = ggsci::pal_npg()(3)
param$col_hto_all = ggsci::pal_npg()(factorial(length(param$hto_names)) + 1)
param$col_hto_collapsed = ggsci::pal_npg()(length(param$hto_names) + 2)
hto_levels = c("Negative", "Doublet", param$hto_names[length(param$hto_names):1])
names(param$col_hto_collapsed) = hto_levelsIn this first section of the report, we read 10X data from the files produced by CellRanger:
and setup a Seurat object. This object includes different data types in separate assays:
Note that ‘Antibody Capture’ features can correspond to ‘HTO’ and ‘ADT’ and are distinguished based on provided HTO names.
# Load the dataset with its assays and create a Seurat object; pass hto_names so that HTO will be a separate assay
sc = Read10xDataset(param$path_data, project=param$project, row_name_column=1, hto_names=param$hto_names)
# If requested: sample at most n cells
if (!is.null(param$sample_cells)) {
sampled_barcodes = sample(Seurat::Cells(sc), min(param$sample_cells, length(Seurat::Cells(sc))))
sc = subset(sc, cells=sampled_barcodes)
}
# Discard cells in other assays which have no HTO counts
original_assay_names = setdiff(Seurat::Assays(sc), "HTO")
assay_cells_hto_zero = Seurat::GetAssayData(sc, assay="HTO", slot="counts") %>%
as.data.frame %>%
colSums() == 0
if (sum(assay_cells_hto_zero)>0) {
sc = subset(sc, cells=which(assay_cells_hto_zero))
warning("Discarded ", sum(assay_cells_hto_zero), " cells with 0 HTO counts.")
}
# Remember original assay names
original_assay_names = setdiff(Seurat::Assays(sc), "HTO") summary = purrr::map_dfr(original_assay_names, function(a) {
data.frame(Assay=a,
CellsTotal=length(assay_cells_hto_zero),
CellsWithHtoReads=sum(!assay_cells_hto_zero),
CellsWithoutHtoReads=sum(assay_cells_hto_zero))
})
knitr::kable(summary, align="l", caption="Dataset summary") %>%
kableExtra::kable_styling(bootstrap_options=c("striped", "hover"), full_width=FALSE, position="left")| Assay | CellsTotal | CellsWithHtoReads | CellsWithoutHtoReads |
|---|---|---|---|
| RNA | 3000 | 3000 | 0 |
This section of the report shows how cells are assigned to their sample-of-origin.
We start the analysis by normalising raw HTO counts. HTO counts for each cell are divided by the total counts for that cell and multiplied by 10,000. This is then natural-log transformed.
We assign cells to their sample-of-origin, annotate negative cells that cannot be assigned to any sample, and doublet cells that are assigned to two samples.
# Demultiplex HTOs
sc = Seurat::HTODemux(sc, assay="HTO", positive.quantile=0.95)
# Sort Idents levels for nicer plotting
Seurat::Idents(sc) = factor(Seurat::Idents(sc), levels=hto_levels)
sc$hash.ID = factor(sc$hash.ID, levels=hto_levels)
# HTO classification results
hash_ID_table = sc@meta.data %>%
dplyr::count(hash.ID) %>%
dplyr::rename(HTO=hash.ID) %>%
dplyr::mutate(Perc=round(n/sum(n)*100,2)) %>%
as.data.frame
p1 = ggplot(sc@meta.data %>% dplyr::count(HTO_classification.global),
aes(x="", y=n, fill=HTO_classification.global)) +
geom_bar(width=1, stat="identity") +
coord_polar("y", start=0) +
xlab("") + ggplot2::ylab("")
p1 = PlotMystyle(p=p1, title="HTO global classification results", col=param$col_hto_global)
p2 = ggplot(sc@meta.data %>% dplyr::count(hash.ID),
aes(x="", y=n, fill=hash.ID)) +
geom_bar(width=1, stat="identity") +
coord_polar("y", start=0) +
xlab("") + ylab("")
p2 = PlotMystyle(p=p2, title="HTO classification results", col=param$col_hto_collapsed)
p = p1 + p2 +
patchwork::plot_annotation(title="HTO classification results") +
gridExtra::tableGrob(hash_ID_table, rows=NULL)
pThis section of the report visualises raw and normalised HTO data to understand whether the demultiplexing step has worked well.
# Distribution of HTO counts before and after normalisation
hto_t_raw = Seurat::GetAssayData(sc, assay="HTO", slot="counts") %>%
as.data.frame %>% t %>% as.data.frame
hto_t_raw_pseudo = hto_t_raw + 1
hto_t_norm = Seurat::GetAssayData(sc, assay="HTO", slot="data") %>%
as.data.frame %>% t %>% as.data.frame
p1 = ggplot(hto_t_raw_pseudo %>% tidyr::pivot_longer(tidyr::everything()), aes(x=name, y=value, fill=name)) +
geom_violin() +
scale_y_continuous(trans="log2") +
xlab("") + ylab("")
p1 = PlotMystyle(p1, title="HTO raw counts", col=param$col_hto_collapsed, legend_title="HTO")
p2 = ggplot(hto_t_norm %>% tidyr::pivot_longer(tidyr::everything()),
aes(x=name, y=value, fill=name)) +
geom_violin() +
xlab("") + ylab("")
p2 = PlotMystyle(p2, title="HTO normalised counts", col=param$col_hto_collapsed, legend_title="HTO")
p = p1 + p2 & theme(legend.position="bottom")
p = p + patchwork::plot_annotation("HTO counts before and after normalisation") +
patchwork::plot_layout(guides = "collect")
pPairs of raw (top) and normalised (bottom) HTO counts are visualised to confirm mutal exclusivity in singlet cells. Data points correspond to measured HTO counts per HTO, colours correspond to the assigned samples-of-origin.
n = DfAllColumnCombinations(x=hto_t_raw_pseudo, cell_classification=sc$hash.ID)
# plot
p = ggplot(n, aes(x=value1, y=value2, color=cell_classification)) +
geom_point() +
scale_x_continuous(trans="log2") + scale_y_continuous(trans="log2") +
scale_color_manual(values=param$col_hto_collapsed)
p = PlotMystyle(p)
p = p + facet_grid(name2~name1, drop=FALSE) +
theme(axis.title.x=element_blank(), strip.text.x=element_text(size=10, color="black"),
axis.title.y=element_blank(), strip.text.y=element_text(size=10, color="black"),
strip.background = element_rect(colour="white", fill="lightgrey"),
legend.position="bottom",
axis.text.x=element_text(angle=45, hjust=1, vjust=0.5)) +
patchwork::plot_annotation("Raw HTO counts")
pn = DfAllColumnCombinations(x=hto_t_norm, cell_classification=sc$hash.ID)
p = ggplot(n, aes(x=value1, y=value2, color=cell_classification)) +
geom_point() +
scale_x_continuous(trans="log2") + scale_y_continuous(trans="log2") +
scale_color_manual(values=param$col_hto_collapsed)
p = PlotMystyle(p)
p = p + facet_grid(name2~name1, drop=FALSE) +
theme(axis.title.x=element_blank(), strip.text.x=element_text(size=10, color="black"),
axis.title.y=element_blank(), strip.text.y=element_text(size=10, color="black"),
strip.background = element_rect(colour="white", fill="lightgrey"),
legend.position="bottom") +
patchwork::plot_annotation("Normalised HTO data")
pThe following ridge plots visualise the enrichment of assigned sample-of-origin for the respective normalised HTO counts.
# Group cells based on HTO classification
p = RidgePlot(sc, assay="HTO", features=rownames(Seurat::GetAssay(sc, assay="HTO")),
same.y.lims=TRUE, cols=param$col_hto_collapsed, combine=FALSE)
for (i in 1:length(p)) p[[i]] = PlotMystyle(p[[i]], legend_title="Classified cells")
p = patchwork::wrap_plots(p, ncol = 2) +
patchwork::plot_annotation("Normalised HTO data") +
patchwork::plot_layout(guides = "collect") &
theme(legend.position="bottom")
pLastly, we compare the number of features between classified cells.
# Number of features in the different cells
nfeature_metrics = grep("_HTO",
grep("nFeature_", colnames(sc[[]]), v=TRUE),
v=TRUE,
invert=TRUE)
p = VlnPlot(sc, features=nfeature_metrics, idents=levels(Seurat::Idents(sc)), pt.size=0) +
geom_violin(color=NA) +
xlab("")
p = PlotMystyle(p,
title="Number of features for HTO-classified cells",
col=param$col_hto_collapsed,
legend_title="Classified cells",
legend_position="bottom")
pThis section of the report states the number of cells that remain after negative and doublet cells are removed.
## An object of class Seurat
## 18984 features across 2143 samples within 2 assays
## Active assay: RNA (18976 features, 0 variable features)
## 1 other assay present: HTO
This section of the report provides first insights into your RNA dataset based on a preliminary pre-processing of the RNA data using the standard scRNA-seq workflow.
sc.all = prelim.analysis(sc=sc.all, mt=param$mt, pc_n=10)
sc = prelim.analysis(sc=sc, mt=param$mt, pc_n=10)We use a UMAP to visualise and explore a dataset. The goal is to place similar cells together in 2D space, and learn about the biology underlying the data. Cells are color-coded according to the assigned sample-of-origin.
Take care not to mis-read a UMAP:
For a nice read to intuitively understand UMAP, see https://pair-code.github.io/understanding-umap/.
Finally, demultiplexed RNA data are written back to file.
# Save each sample in a separate directory
samples = levels(Seurat::Idents(sc))
demux_samples_paths = c()
for (s in samples) {
p = ExportSeuratAssayData(sc[,Seurat::Idents(sc)==s],
dir=file.path(param$path_out, s),
assays=original_assay_names,
slot="counts",
include_cell_metadata_cols=c("HTO_maxID",
"HTO_classification",
"HTO_classification.global",
"hash.ID"))
demux_samples_paths = c(demux_samples_paths, p)
}
message(demux_samples_paths)This report was generated using the scrnaseq GitHub repository. Software versions were collected at run time.
out = scrnaseq_session_info()
knitr::kable(out, align="l") %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"), full_width=FALSE, position="left")| Name | Version |
|---|---|
| ktrns/scrnaseq | 2b34f5ed4486ec6266c662a2230f57d6f7ce9fad |
| R | R version 3.6.1 (2019-07-05) |
| Platform | x86_64-apple-darwin15.6.0 (64-bit) |
| Operating system | macOS Mojave 10.14.6 |
| Packages | ape5.3, assertthat0.2.1, backports1.1.5, bitops1.0-6, caTools1.17.1.3, cli2.0.2, cluster2.1.0, codetools0.2-16, colorspace1.4-1, cowplot1.0.0, crayon1.3.4, data.table1.12.6, digest0.6.25, dplyr0.8.3, evaluate0.14, fansi0.4.0, farver2.0.1, fitdistrplus1.0-14, future1.15.1, future.apply1.3.0, gdata2.18.0, ggplot23.2.1, ggrepel0.8.1, ggridges0.5.1, ggsci2.9, globals0.12.5, glue1.4.1, gplots3.0.1.1, gridExtra2.3, gtable0.3.0, gtools3.8.1, highr0.8, hms0.5.2, htmltools0.4.0, htmlwidgets1.5.1, httr1.4.1, ica1.0-2, igraph1.2.4.2, irlba2.3.3, jsonlite1.6.1, kableExtra1.1.0, KernSmooth2.23-16, knitr1.26, labeling0.3, lattice0.20-38, lazyeval0.2.2, leiden0.3.1, lifecycle0.1.0, listenv0.8.0, lmtest0.9-37, lsei1.2-0, magrittr1.5, MASS7.3-51.4, Matrix1.2-18, munsell0.5.0, nlme3.1-142, npsurv0.4-0, patchwork1.0.0, pbapply1.4-2, pillar1.4.2, pkgconfig2.0.3, plotly4.9.1, plyr1.8.4, png0.1-7, purrr0.3.3, R.methodsS31.7.1, R.oo1.23.0, R.utils2.9.2, R62.4.1, RANN2.6.1, RColorBrewer1.1-2, Rcpp1.0.3, RcppAnnoy0.0.14, RcppParallel4.4.4, readr1.3.1, reshape21.4.3, reticulate1.13, rlang0.4.6, rmarkdown1.18, ROCR1.0-7, RSpectra0.16-0, rstudioapi0.11, rsvd1.0.2, Rtsne0.15, rvest0.3.5, scales1.1.0, sctransform0.2.0, sessioninfo1.1.1, Seurat3.1.5, stringi1.4.3, stringr1.4.0, survival3.1-8, tibble2.1.3, tidyr1.0.0, tidyselect0.2.5, tsne0.1-3, uwot0.1.5, vctrs0.2.0, viridisLite0.3.0, webshot0.5.2, withr2.1.2, xfun0.11, xml21.2.2, yaml2.2.0, zeallot0.1.0, zoo1.8-6 |